package com.lzp.web.xml.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.seimicrawler.xpath.JXDocument;
import org.seimicrawler.xpath.JXNode;

import java.io.File;
import java.io.IOException;
import java.util.List;

/**
 * xpath语法
 * https://www.w3cschool.cn/xpath/xpath-syntax.html
 * 需要额外导入一个jar包
 *
 * @author lzp
 * @date 2020/05/11
 */
public class JsoupParseXMLByXpath {
    public static void main(String[] args) throws IOException {
        String path = JsoupParseXMLByXpath.class.getClassLoader().getResource("com/lzp/xml/jsoup/student.xml").getPath();
        Document document = Jsoup.parse(new File(path), "utf-8");

        // Document不能直接使用xpath语法
        JXDocument jxDocument = JXDocument.create(document);

        // 1 查询所有的name标签
        List<JXNode> jxNodes = jxDocument.selN("//name");
        for (JXNode jxNode : jxNodes) {
            System.out.println(jxNode);
        }
        System.out.println();

        // 2 查询所有student标签下的name标签
        List<JXNode> jxNodes1 = jxDocument.selN("//student/name");
        for (JXNode jxNode : jxNodes1) {
            System.out.println(jxNode);
        }

        System.out.println();
        // 3 查询student标签下带id的name标签
        List<JXNode> jxNodes2 = jxDocument.selN("//student/name[@id]");
        for (JXNode jxNode : jxNodes2) {
            System.out.println(jxNode);
        }
        System.out.println();
        // 4 查询student标签下带id的name标签,并且值为name
        List<JXNode> jxNodes3 = jxDocument.selN("//student/sex[@class='coder']");
        for (JXNode jxNode : jxNodes3) {
            System.out.println(jxNode);
        }
    }
}
